library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0 ✔ purrr 0.3.0
## ✔ tibble 2.0.0 ✔ dplyr 0.7.8
## ✔ tidyr 0.8.2 ✔ stringr 1.3.1
## ✔ readr 1.3.1 ✔ forcats 0.3.0
## Warning: package 'ggplot2' was built under R version 3.4.4
## Warning: package 'tidyr' was built under R version 3.4.4
## Warning: package 'readr' was built under R version 3.4.4
## Warning: package 'purrr' was built under R version 3.4.4
## Warning: package 'dplyr' was built under R version 3.4.4
## Warning: package 'stringr' was built under R version 3.4.4
## ── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(Hmisc)
## Warning: package 'Hmisc' was built under R version 3.4.4
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Warning: package 'Formula' was built under R version 3.4.4
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:Hmisc':
##
## subplot
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(forcats)
pokes <-read.csv("https://www.dropbox.com/s/i0lwxgv86eaoq4o/pokemon.csv?dl=1")
#Scraped using script I wrote in Python (https://github.com/ayakkala1/Stat-331/blob/master/scrape_smogon.py):
smogon <- read.csv("/Users/ramanyakkala/Stat-331/smogon.csv")
This analysis will judge “strong pokemon” by using Smogon rankings.
“Smogon University, commonly shortened to Smogon, is a website whose content encompasses competitive Pokémon battling. It offers guides on battling strategies for people of different Pokémon knowledge backgrounds. The website was founded by ‘chaos’ (one of the developers of NetBattle) and is a considerably well-known website, visited by competitive Pokémon battling enthusiasts.” - https://bulbapedia.bulbagarden.net/wiki/Smogon
Let’s import the smogon rankings I scraped
rankings <- smogon %>%
filter(formats != "") %>%
rename("Name" = "name")
## Warning: package 'bindrcpp' was built under R version 3.4.4
Let’s do Difference of Means tests of various fighting metrics by Uber vs Not Uber. (https://www.smogon.com/dex/sm/formats/uber/)
pokes %>%
left_join(select(rankings,Name,formats, evos), by = "Name") %>%
mutate(formats = replace_na(formats,"Untiered")) %>%
mutate(Uber = ifelse(formats == "Uber",1,0)) %>%
select_if(is.numeric) %>%
select(-Number) %>%
gather(key = variable, value = value, -Uber) %>%
nest(-variable) %>%
group_by(variable) %>%
mutate(p_value = unlist(map(data, ~t.test(value ~ Uber,na.rm = TRUE,.)$p.value))) %>%
arrange(p_value) %>%
mutate(significant = ifelse(p_value < 0.05,"yes","no"))
## Warning: Column `Name` joining factors with different levels, coercing to
## character vector
## # A tibble: 11 x 4
## # Groups: variable [11]
## variable data p_value significant
## <chr> <list> <dbl> <chr>
## 1 Catch_Rate <data.frame [721 × 2]> 1.32e-22 yes
## 2 Total <data.frame [721 × 2]> 3.93e-16 yes
## 3 Sp_Atk <data.frame [721 × 2]> 5.92e- 9 yes
## 4 Attack <data.frame [721 × 2]> 2.63e- 8 yes
## 5 Speed <data.frame [721 × 2]> 3.64e- 8 yes
## 6 Height_m <data.frame [721 × 2]> 2.46e- 6 yes
## 7 Defense <data.frame [721 × 2]> 2.57e- 6 yes
## 8 Sp_Def <data.frame [721 × 2]> 4.72e- 6 yes
## 9 HP <data.frame [721 × 2]> 6.11e- 6 yes
## 10 Weight_kg <data.frame [721 × 2]> 2.73e- 4 yes
## 11 Generation <data.frame [721 × 2]> 4.20e- 2 yes
Everything is significant, but also we have only two pokemon that are Uber ranking in our Gen 1-6 dataset. This is because most of the Uber’s are mega pokemon, various versions of legendary pokemon (ex: Arceus & Deoxys), or Gen 7.
Let’s look at the OverUsed tier which is the next ranking. This is also the more common format for competitive pokemon.
pokes %>%
left_join(select(rankings,Name,formats, evos), by = "Name") %>%
mutate(formats = replace_na(formats,"Untiered")) %>%
filter(formats != "Uber" ) %>%
mutate(OU = ifelse(formats == "OU",1,0)) %>%
select_if(is.numeric) %>%
select(-Number) %>%
gather(key = variable, value = value, -OU) %>%
#group_by(OU, variable) %>%
nest(-variable) %>%
group_by(variable) %>%
mutate(p_value = unlist(map(data, ~t.test(value ~ OU,na.rm = TRUE,alternative="less",.)$p.value)),
t_value = unlist(map(data, ~t.test(value ~ OU,na.rm = TRUE,alternative="less",.)$statistic))) %>%
arrange(p_value) %>%
mutate(significant = ifelse(p_value < 0.05,"yes","no"))
## Warning: Column `Name` joining factors with different levels, coercing to
## character vector
## # A tibble: 11 x 5
## # Groups: variable [11]
## variable data p_value t_value significant
## <chr> <list> <dbl> <dbl> <chr>
## 1 Total <data.frame [700 × 2]> 1.11e-10 -10.3 yes
## 2 Sp_Def <data.frame [700 × 2]> 2.08e- 5 -5.05 yes
## 3 Defense <data.frame [700 × 2]> 7.14e- 4 -3.67 yes
## 4 Sp_Atk <data.frame [700 × 2]> 1.39e- 3 -3.39 yes
## 5 HP <data.frame [700 × 2]> 1.63e- 3 -3.33 yes
## 6 Speed <data.frame [700 × 2]> 1.68e- 3 -3.30 yes
## 7 Attack <data.frame [700 × 2]> 1.96e- 2 -2.20 yes
## 8 Height_m <data.frame [700 × 2]> 2.42e- 2 -2.09 yes
## 9 Weight_kg <data.frame [700 × 2]> 8.77e- 2 -1.40 no
## 10 Generation <data.frame [700 × 2]> 1.96e- 1 -0.872 no
## 11 Catch_Rate <data.frame [700 × 2]> 10.00e- 1 10.8 no
Seems like all the one sided differences are siginficant, except for Weight & Generation & Base Catch Rate.
Let’s Graph the Differences.
pokes %>%
left_join(select(rankings,Name,formats, evos), by = "Name") %>%
mutate(formats = replace_na(formats,"Untiered")) %>%
filter(formats != "Uber" ) %>%
mutate(OU = ifelse(formats == "OU",1,0)) %>%
select_if(is.numeric) %>%
select(-Number) %>%
group_by(OU) %>%
summarize_all(funs(mean)) %>%
gather(key,value,-OU) %>%
ggplot(aes(x = OU,y = value)) + geom_col() + facet_wrap(~key, scales = "free_y") + ggtitle("Comparisons of fighting metrics of OU vs not OU") + xlab("Non OU vs OU") + ylab("values")
## Warning: Column `Name` joining factors with different levels, coercing to
## character vector
Some fun stuff I was doing with probability of catching a pokemon,before I realized that this lab had to do with finding stronger pokemon.
pokes %>%
keep(is.numeric) %>%
as.matrix() %>%
rcorr(type = "spearman")
## Number Total HP Attack Defense Sp_Atk Sp_Def Speed
## Number 1.00 0.15 0.14 0.13 0.13 0.10 0.10 0.03
## Total 0.15 1.00 0.74 0.70 0.67 0.71 0.75 0.54
## HP 0.14 0.74 1.00 0.57 0.44 0.47 0.50 0.26
## Attack 0.13 0.70 0.57 1.00 0.51 0.33 0.28 0.34
## Defense 0.13 0.67 0.44 0.51 1.00 0.29 0.55 0.06
## Sp_Atk 0.10 0.71 0.47 0.33 0.29 1.00 0.56 0.43
## Sp_Def 0.10 0.75 0.50 0.28 0.55 0.56 1.00 0.30
## Speed 0.03 0.54 0.26 0.34 0.06 0.43 0.30 1.00
## Generation 0.98 0.09 0.11 0.08 0.07 0.06 0.04 0.00
## Height_m -0.04 0.72 0.62 0.59 0.49 0.45 0.51 0.35
## Weight_kg 0.03 0.65 0.58 0.58 0.53 0.33 0.46 0.20
## Catch_Rate -0.08 -0.72 -0.52 -0.51 -0.47 -0.53 -0.55 -0.41
## Generation Height_m Weight_kg Catch_Rate
## Number 0.98 -0.04 0.03 -0.08
## Total 0.09 0.72 0.65 -0.72
## HP 0.11 0.62 0.58 -0.52
## Attack 0.08 0.59 0.58 -0.51
## Defense 0.07 0.49 0.53 -0.47
## Sp_Atk 0.06 0.45 0.33 -0.53
## Sp_Def 0.04 0.51 0.46 -0.55
## Speed 0.00 0.35 0.20 -0.41
## Generation 1.00 -0.08 -0.02 -0.02
## Height_m -0.08 1.00 0.84 -0.54
## Weight_kg -0.02 0.84 1.00 -0.48
## Catch_Rate -0.02 -0.54 -0.48 1.00
##
## n= 721
##
##
## P
## Number Total HP Attack Defense Sp_Atk Sp_Def Speed
## Number 0.0000 0.0001 0.0006 0.0004 0.0051 0.0075 0.4954
## Total 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
## HP 0.0001 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
## Attack 0.0006 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
## Defense 0.0004 0.0000 0.0000 0.0000 0.0000 0.0000 0.0937
## Sp_Atk 0.0051 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
## Sp_Def 0.0075 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
## Speed 0.4954 0.0000 0.0000 0.0000 0.0937 0.0000 0.0000
## Generation 0.0000 0.0165 0.0042 0.0251 0.0492 0.1161 0.2363 0.9440
## Height_m 0.3295 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
## Weight_kg 0.4682 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
## Catch_Rate 0.0420 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
## Generation Height_m Weight_kg Catch_Rate
## Number 0.0000 0.3295 0.4682 0.0420
## Total 0.0165 0.0000 0.0000 0.0000
## HP 0.0042 0.0000 0.0000 0.0000
## Attack 0.0251 0.0000 0.0000 0.0000
## Defense 0.0492 0.0000 0.0000 0.0000
## Sp_Atk 0.1161 0.0000 0.0000 0.0000
## Sp_Def 0.2363 0.0000 0.0000 0.0000
## Speed 0.9440 0.0000 0.0000 0.0000
## Generation 0.0260 0.5234 0.5972
## Height_m 0.0260 0.0000 0.0000
## Weight_kg 0.5234 0.0000 0.0000
## Catch_Rate 0.5972 0.0000 0.0000
legendary <- c("Articuno","Zapdos","Moltres","Mewtwo","Mew","Raikou",
"Entei","Suicune","Lugia","Ho-Oh","Latias","Latios","Kyogre",
"Groudon","Rayquaza","Jirachi","Deoxys","Regirock","Regice",
"Registeel","Uxie","Mesprit","Azelf","Dialga","Palkia","Heatran",
"Regigigas","Giratina","Cresselia","Phione","Manaphy","Darkrai",
"Shaymin","Arceus","Victini","Cobalion", "Virizion", "Tornadus",
"Thundurus","Reshiram","Zekrom","Landorus","Kyurem","Keldeo",
"Genesec","Xerneas","Yveltal","Zygarde","Diancie","Hoopa","Volcanion")
pokes %>%
filter(!Name %in% legendary) %>%
mutate(types = paste(Type_1,Type_2,sep=":")) %>%
separate_rows(types, sep = ":", convert = FALSE) %>%
filter(types != "") %>%
mutate(Type = fct_reorder(types,Catch_Rate,.fun=mean)) %>%
group_by(Type) %>%
summarise(mean_catch = mean(Catch_Rate)) %>%
mutate(Type = fct_reorder(Type,mean_catch,fun=n,desc=TRUE)) %>%
ggplot(aes(x=Type, y=mean_catch, fill = Type)) + geom_col() + ylab("Average Base Catch Number") +ggtitle("Average Base Catch Number by Type")
# Modified catch rate
capture_k_trials <- function(shake_prob, k){
prob_pass_shake <- (shake_prob-1)/65535
prob_shake_fails <- 1-dbinom(4, size = 4, prob = prob_pass_shake)
prob_one_succ <- 1-(prob_shake_fails)^k
return(prob_one_succ)
}
modified_catch <- function(perc,HP,Catch_Rate){
mod <- ((((3 * HP) - (2 * (HP*(perc/100))))*Catch_Rate)/(3*HP))
return(mod)
}
current = pokes$HP %/% 2
pokes %>%
mutate(mod_catch = ((3 * HP - 2 * current)*Catch_Rate)/(3*HP)) %>%
mutate(shake_prob = 1048560 %/% floor(sqrt(floor(sqrt(16711680%/%mod_catch))))) %>%
mutate(prob_succ = map_dbl(shake_prob,~capture_k_trials(.x,5))) %>%
filter(!Name %in% legendary) %>%
mutate(types = paste(Type_1,Type_2,sep=":")) %>%
separate_rows(types, sep = ":", convert = FALSE) %>%
filter(types != "") %>%
mutate(Type = fct_reorder(types,prob_succ,.fun=mean)) %>%
group_by(Type) %>%
summarise(mean_catch = mean(prob_succ),mean_catch2 = mean(Catch_Rate)) %>%
mutate(Type = fct_reorder(Type,mean_catch,fun=n,desc=TRUE)) %>%
ggplot(aes(x=Type, y=mean_catch, fill = Type)) + geom_col() + ylab("Avg. Prob. of 1 success in 5 throws") + ggtitle("Average Probability of Successful Catch in 5 throws by Pokemon Type")
pokes %>%
filter(Name == "Bagon") %>%
select(HP,Catch_Rate) %>%
pmap_dfr(
.,
~as.list(set_names(
modified_catch(...),
paste0(1:100)
)),
perc = 1:100
) %>%
gather(key = "percent", value="mod_catch") %>%
mutate(shake_prob = 1048560 %/% floor(sqrt(floor
(sqrt(16711680%/%mod_catch))))) %>%
mutate(prob_succ = map_dbl(shake_prob,~capture_k_trials(.x,1))) %>%
mutate(percent = as.integer(percent)) %>%
ggplot(aes(x=percent,y=prob_succ))+geom_point()+ylim(0,0.2) + ggtitle("Probability of Success, the higher the percent the Pokemon (Bagon)") + xlab("Percent of Health of Pokemon") + ylab("Probability of Success")
Bagon <- list()
for(i in 1:100) {
Bagon[[i]] <- list(visible = FALSE,
name = paste0('Percent of health missing: ',i),
x=1:20,
y= as.list(pokes %>%
filter(Name == "Cacturne") %>%
mutate(mod_catch = modified_catch(i, HP, Catch_Rate)) %>%
mutate(shake_prob = 1048560 %/% floor(sqrt(floor
(sqrt(16711680%/%mod_catch))))) %>%
select(shake_prob) %>%
pmap_dfr(
.,
~as.list(set_names(
capture_k_trials(...),
paste0(1:20)
)),
k = 1:20
) %>%
gather(key = "tries",value="chance") %>%
select(chance)))
}
Bagon[100][[1]]$visible = TRUE
steps <- list()
p <- plot_ly()
for (i in 100:1) {
p <- add_lines(p,x=Bagon[i][[1]]$x, y=Bagon[i][[1]]$y$chance, visible = Bagon[i][[1]]$visible,
name = Bagon[i][[1]]$name, type = 'scatter', mode = 'lines', hoverinfo = 'name',
line=list(color='00CED1'), showlegend = FALSE) %>%
layout(xaxis = list(title = 'K tries'),
yaxis = list(title = 'Percent of Success'),
legend = list(x = 0.80, y = 0.90))
step <- list(args = list('visible', rep(FALSE, length(Bagon))),
method = 'restyle')
step$args[[2]][i] = TRUE
steps[[i]] = step
}
p <- p %>%
layout(sliders = list(list(active = 100,
currentvalue = list(prefix = "Percentage Health Missing: "),
steps = steps))
)
p
format_poke_graph <- function(i){
return(list(method = "restyle",
args = list("transforms[0].value", unique(test$Name)[i]),
label = unique(test$Name)[i]))
}
test <- pokes %>%
mutate(mod_catch = modified_catch(50, HP, Catch_Rate)) %>%
mutate(shake_prob = 1048560 %/% floor(sqrt(floor
(sqrt(16711680%/%mod_catch))))) %>%
select(shake_prob) %>%
pmap_dfr(
.,
~as.list(set_names(
capture_k_trials(...),
paste0(1:20)
)),
k = 1:20
) %>%
add_column(Name = pokes$Name) %>%
gather("tries","percent_succ",-Name) %>%
mutate(tries = as.integer(tries))
steps <- list()
p <- plot_ly(data = test, x = ~tries, y = ~percent_succ)
p <- test %>%
plot_ly(
type = 'scatter',
x = ~tries,
y = ~percent_succ,
text = ~Name,
hoverinfo = 'text',
mode = 'line',
transforms = list(
list(
type = 'filter',
target = ~Name,
operation = '=',
value = unique(test$Name)[1]
)
)) %>% layout(
title = "Pick a pokemon",
updatemenus = list(
list(
type = 'dropdown',
active = 0,
buttons = map(1:length(pokes),format_poke_graph)
)))
p